###Illustration Code for FE in Rare Events Data - Cook, Hays, Franzese 2018
###7/31/2018

#setwd("")
#sink("pmlfe_illustration_log.txt")

library(ggplot2)
library(foreign)
library(lme4)
library(survival)
library(plm)
library(MASS)
library(mfx)
library(dummies)
library(prediction)
library(margins)
library(brglm)
library(psych)

rm(list = ls())

#Import data & create dummy for event-experiencing units  
FL <- read.dta("FL_rep_old.dta")
FL$Oil <- factor(FL$Oil)
FL$warl <- factor(FL$warl)
FL$instab <- factor(FL$instab)
FL$nwstate <- factor(FL$nwstate)
FL$ncontig <- factor(FL$ncontig)

y_panel <- aggregate(onset ~ ccode, FL, mean)

y_panel$censor <- ifelse(y_panel$onset == 0, 1, 0)
y_panel$uncensor <- ifelse(y_panel$onset > 0, 1, 0)

for (i in 1:nrow(y_panel)) {
  FL$censor[FL$ccode == y_panel$ccode[i]] <- y_panel$censor[i]
}

FL$uncensor <- 1- FL$censor
FL$ccode_un <- FL$ccode*FL$uncensor

FL2 <- FL[ which(FL$uncensor=='1'), ]


#Model estimation
model1 <- glm(onset ~ warl + gdpenl + lpopl1 +lmtnest + ncontig + Oil + nwstate + instab + polity2l + ethfrac + relfrac , family=binomial(link="logit"),data = FL)
model2 <- glmer(onset ~ warl + gdpenl + lpopl1 +lmtnest + ncontig + Oil + nwstate + instab + polity2l + ethfrac + relfrac + (1|ccode), data = FL, family = binomial(link = "logit"), control = glmerControl(optimizer = "bobyqa"),nAGQ = 10)
model3 <- glm(onset ~ warl + gdpenl + lpopl1  + ncontig + Oil + nwstate + instab + polity2l +  factor(ccode), data = FL2, family = binomial(link = "logit"))
model4 <- brglm(onset ~ warl + gdpenl + lpopl1  + ncontig + Oil + nwstate + instab + polity2l + factor(ccode_un), family=binomial(link="logit"), method = "brglm.fit", p1 = T, data = FL)
model5 <- clogit(onset ~ warl + gdpenl + lpopl1  + ncontig + Oil + nwstate + instab + polity2l + strata(ccode), data = FL)

#Plot coefficient results (Figure 1)
pdf("Figure1.pdf") 
  model1Frame <- data.frame(Variable = rownames(summary(model1)$coef),
                            Coefficient = summary(model1)$coef[, 1],
                            SE = summary(model1)$coef[, 2],
                            Model = "Pooled")
  model2Frame <- data.frame(Variable = rownames(summary(model2)$coef),
                            Coefficient = summary(model2)$coef[, 1],
                            SE = summary(model2)$coef[,2],
                            Model = "Random Effects")
  model3Frame <- data.frame(Variable = rownames(summary(model3)$coef)[2:9],
                            Coefficient = summary(model3)$coef[2:9, 1],
                            SE = summary(model3)$coef[2:9, 2],
                            Model = "UNC-FE")
  model4Frame <- data.frame(Variable = rownames(summary(model4)$coef)[2:9],
                            Coefficient = summary(model4)$coef[2:9, 1],
                            SE =  summary(model4)$coef[2:9, 2],
                            Model = "PML-FE") 

  allModelFrame <- data.frame(rbind(model4Frame, model3Frame, model2Frame, model1Frame)) 
  allModelFrame <- allModelFrame[-c(17,29), ]
  allModelFrame$Variable <- factor(allModelFrame$Variable,levels = c("relfrac", "ethfrac", "polity2l", "instab1","nwstate1","Oil1","ncontig1","lmtnest","lpopl1","gdpenl","warl1"))
  
  interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
  interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier
  
  zp1 <- ggplot(allModelFrame, aes(colour = Model)) +  scale_colour_grey(start = 0, end = 0.8) 
  zp1 <- zp1 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
  zp1 <- zp1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                  ymax = Coefficient + SE*interval1),
                              lwd = 1, position = position_dodge(width = 0.9))
  zp1 <- zp1 + geom_linerange(aes(x = Variable,  ymin = Coefficient - SE*interval2,
                                  ymax = Coefficient + SE*interval2),
                              lwd = 1/2, position = position_dodge(width = 0.9))
  zp1 <- zp1 + geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
    ymax = Coefficient + SE*interval2, shape = Model),
    lwd = 1/2, position = position_dodge(width = 0.9)) + scale_shape(solid = TRUE)
  zp1 <- zp1 + coord_flip() + theme_bw() +theme(axis.text=element_text(size=14,face="bold"),
                                                axis.title=element_text(size=14,face="bold")) 
  zp1 <- zp1 + ggtitle("") + xlab("") + ylab("Coefficient Estimates") + scale_size(guide = 'none') + guides(colour = guide_legend(reverse=TRUE)) + guides(shape = guide_legend(reverse=TRUE)) + theme(legend.text=element_text(size=14,face="bold")) 
  zp1 
dev.off()  

#Plot marginal effect results (Figure 2)
pdf("Figure2.pdf") 
  mfx.pool <- margins(model1)
  ##Note: since the margins command in R does not currently support glmer, the RE results are generated using illustration_margins_re.do in Stata## 
  #mfx.re  <- margins(model2)
  mfx.uncfe  <- margins(model3)
  mfx.pmlfe <- margins(model4)
  
  summary(mfx.pool)[,2]
  summary(mfx.uncfe)[65:72,2]
  summary(mfx.pmlfe)[66:73,2]
  
  model1Framefx <- data.frame(Variable = summary(mfx.pool)[,1],
                            Coefficient = summary(mfx.pool)[,2],
                            SE = summary(mfx.pool)[,3],
                            Model = "Pooled")
  ##Note: since the margins command in R does not currently support glmer, the RE results are generated using illustration_margins_re.do in Stata## 
  model2Framefx <- data.frame(Variable = summary(mfx.pool)[,1],
                            Coefficient = c(0.0027,-0.0055,0.0117,0.0035,0.0042,0.0081,0.0583,0.0187,0.0003,0.0046,-0.0120),
                            SE = c(0.00596,0.00124,0.0053,0.0014,0.0012,0.0057,0.0203,0.008,0.0003,0.008,0.0032),
                            Model = "Random Effects")
  model3Framefx <- data.frame(Variable = summary(mfx.uncfe)[65:72,1],
                            Coefficient = summary(mfx.uncfe)[65:72,2],
                            SE = summary(mfx.uncfe)[65:72,3],
                            Model = "UNC-FE")
  model4Framefx <- data.frame(Variable = summary(mfx.pmlfe)[66:73,1],
                            Coefficient = summary(mfx.pmlfe)[66:73,2],
                            SE =  summary(mfx.pmlfe)[66:73,3],
                            Model = "PML-FE") 
  
  
  allModelFramefx <- data.frame(rbind(model4Framefx, model3Framefx, model2Framefx, model1Framefx))  # etc.
  allModelFramefx$Variable <- factor(allModelFramefx$Variable,levels = c("relfrac", "ethfrac", "polity2l", "instab1","nwstate1","Oil1","ncontig1","lmtnest","lpopl1","gdpenl","warl1"))
  
  interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
  interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier
  
  mep <- ggplot(allModelFramefx, aes(colour = Model)) +  scale_colour_grey(start = 0, end = 0.8) 
  mep <- mep + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
  mep <- mep + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                  ymax = Coefficient + SE*interval1),
                              lwd = 1, position = position_dodge(width = 0.9))
  mep <- mep + geom_linerange(aes(x = Variable,  ymin = Coefficient - SE*interval2,
                                  ymax = Coefficient + SE*interval2),
                              lwd = 1/2, position = position_dodge(width = 0.9))
  mep <- mep + geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
                                   ymax = Coefficient + SE*interval2, shape = Model),
                               lwd = 1/2, position = position_dodge(width = 0.9)) + scale_shape(solid = TRUE)
  mep <- mep + coord_flip() + theme_bw() +theme(axis.text=element_text(size=14,face="bold"),
                                                axis.title=element_text(size=14,face="bold")) 
  mep <- mep + ggtitle("") + xlab("") + ylab("Average Marginal Effects") + scale_size(guide = 'none') + guides(colour = guide_legend(reverse=TRUE)) + guides(shape = guide_legend(reverse=TRUE)) + theme(legend.text=element_text(size=14,face="bold")) 
  mep  
dev.off()
  
#Online Appendix II 
  
##Elements of Table 1 
online_appII_t1_c1 <- cbind(summary(model1)$coef[2:12, 1], summary(model1)$coef[2:12, 2])  
online_appII_t1_c2 <- cbind(summary(model2)$coef[2:12, 1], summary(model2)$coef[2:12, 2])
online_appII_t1_c3 <- cbind(summary(model3)$coef[2:9, 1], summary(model3)$coef[2:9, 2])
online_appII_t1_c4 <- cbind(summary(model4)$coef[2:9, 1], summary(model4)$coef[2:9, 2])
online_appII_t1_c5 <- cbind(model5$coef[1:8], sqrt(diag(model5$var)))
online_appII_t1_c1
online_appII_t1_c2
online_appII_t1_c3
online_appII_t1_c4
online_appII_t1_c5

##Elements of Table 2 
online_appII_t2_c1 <- cbind(summary(mfx.pool)[,2], summary(mfx.pool)[,3])
online_appII_t2_c2 <- cbind(model2Framefx[,2], model2Framefx[,3])
online_appII_t2_c3 <- cbind(summary(mfx.uncfe)[65:72,2], summary(mfx.uncfe)[65:72,3])
online_appII_t2_c4 <- cbind(summary(mfx.pmlfe)[66:73,2], summary(mfx.pmlfe)[66:73,3])
online_appII_t2_c1
online_appII_t2_c2
online_appII_t2_c3
online_appII_t2_c4

#sink()
  